Code
# targets::tar_load(params2$ss)
# ss<-eval(rlang::sym(params2$ss))# targets::tar_load(params2$ss)
# ss<-eval(rlang::sym(params2$ss))This technical guide covers how to analyze Illumina methylation Beadchips arrays commonly known as EPIC/450K methylation arrays. here some description about a methylation array and how it is prepeared…
The first step once you have your data is to load it to R in an appropriate format. The ´minfi´ packages provides read.metharray functions for this purpose. Also the functions read.metharray.sheet, read.metharray.exp can be used. Those are wrappers around read.metharray to make it easier for the user. Check this guideline or type vignette("minfi) on an R console for more info. In this case a built in function cnv.methyl::read.metharray.exp.par() is used which is a wrapper for minfi formula with parallelization to speed up this process a little bit.
In order to use any of this formulas we first need to generate the sample_sheet.
The sample sheet contains inforamtion about your samples and the experimental setup. It is mandatory to respect the column names in order to make the pipline work. ##It must contain the following columns: - Sample_Name - Basename
##Other recommended columns with technical details. Can be used to track and detect batch effects: - Project - Pool_ID - Sample_Plate - Sample_Well - Sample_Group - Sentrix_ID - Sentrix_Position
##Pheno columns: - Gender (will be predicted) - Type - Condition - Any other information
In the following table, you can take a look at the sample sheet used for this project.
#
# renv::install("circlize")
# renv::install("RColorBrewer")
# renv::install("DT")
# renv::install("ggplot2")
# renv::install("shiny")
# renv::install("shinyWidgets")
# renv::install("ggplot2")
# renv::install("data.table")
# renv::install("writexl")
library("circlize")========================================
circlize version 0.4.15
CRAN page: https://cran.r-project.org/package=circlize
Github page: https://github.com/jokergoo/circlize
Documentation: https://jokergoo.github.io/circlize_book/book/
If you use it in published research, please cite:
Gu, Z. circlize implements and enhances circular visualization
in R. Bioinformatics 2014.
This message can be suppressed by:
suppressPackageStartupMessages(library(circlize))
========================================
library("RColorBrewer")
library("DT")
library("ggplot2")
library("shiny")
Attaching package: 'shiny'
The following objects are masked from 'package:DT':
dataTableOutput, renderDataTable
library("shinyWidgets")
library("data.table")
library("writexl")
top_beta <- function(beta_values, n=1000){
sdv <- apply(beta_values, 1, sd)
top100 <- names(head(sort(sdv,decreasing=T), n))
beta_top100 <- beta_values[top100,]
return(beta_top100)
}dtable<-function(data){
DT::datatable(
{ data},
filter = 'top',
# selection = list(mode = 'multiple', selected = c(1:10), target = 'column', selectable = c(-2, -3)),
fillContainer = F,
# style = "bootstrap",
extensions = 'Buttons',
options = list(
paging = TRUE,
searching = TRUE,
fixedColumns = TRUE,
autoWidth = FALSE,
scrollX=TRUE,
digits=4,
ordering = TRUE,
dom = 'Bfrtip',
buttons = list(
list(
extend = "collection",
text = 'download entire dataset',
action = DT::JS("function ( e, dt, node, config ) {
Shiny.setInputValue('test', true, {priority: 'event'});
}")
),
'copy',
'csv',
'excel'
),
class = "display",
server=TRUE
),
) |> DT::formatRound(which(sapply(data,is.double)),4)
}myModal <- function() {
div(id = "test",
shiny::modalDialog(downloadButton("download1","Download data as csv"),
br(),
br(),
downloadButton("download2","Download data as excel"),
easyClose = TRUE, title = "Download Table")
)
}
renderDT<- function(data){
output$dtable <- DT::renderDataTable({
dtable(data)
})
shiny::observeEvent(input$test, {
print("hello")
showModal(myModal())
})
output$download1 <- shiny::downloadHandler(
filename = function() {
paste("data-", Sys.Date(), ".csv", sep="")
},
content = function(file) {
write.csv(data, file)
}
)
output$download2 <- shiny::downloadHandler(
filename = function() {
paste("data-", Sys.Date(), ".xlsx", sep="")
},
content = function(file) {
writexl::write_xlsx(data, file)
})
}dtable(data.table::as.data.table(ss))First of all let’s define some functions for the color scales our heatmap will use:
library(ComplexHeatmap)
library(circlize)
# Simple continuous palette:
col_fun = colorRamp2(c(-1,-0.1, 0.1, 1), c("blue","white","white","red"))
# Continuous, uses a predefined color palette or manual color vector
col_fn<- function(x,n=100,palette=viridis::cividis(n)){
colorRamp2( seq(min(x,na.rm = T),max(x, na.rm = T),length.out=n), palette)
}
# Continuous, monochrome breakpoints based on values distribution (quartiles):
col_fq <- function(x,probs=c(0,0.25,0.5,0.75,1),color ){
colorRamp2( quantile(x,probs=probs),
monochromeR::generate_palette("white",
blend_colour = color, n_colours = length(probs)
)
)
}
# Continuous, monochrome can manually set breakpoints:
col_fbp <- function(x,bp,color){
colorRamp2( bp,
monochromeR::generate_palette("white",
blend_colour = color, n_colours = length(bp)
)
)
}Here we define the function for the heat maps
library(ComplexHeatmap)
meth_heatmap <- function(samplesheet, bvals, annotation, idcol="rn" ){
require(ComplexHeatmap)
require(data.table)
data.table::setDT(annotation)
setkeyv(annotation,idcol)
# Annotation object for top annotation:
data.table::setDT(samplesheet)
# purity = samplesheet[,.SD,.SDcols=startsWith(names(samplesheet),"purity")]
ha_column = HeatmapAnnotation(
annotation_name_side = "left",
Type = anno_block(gp=gpar(fill= rainbow(n=length(unique(samplesheet$Type)))),
labels = unique(samplesheet$Type), #unique(samplesheet$Type),
labels_gp = gpar(col = "white", fontsize = 10)),
Condition = samplesheet$Condition,
# purity = unlist(purity),
col = #A list of named vectors were names = vector values and value = color.
list(
Condition = setNames(
palette.colors(length(unique(samplesheet$Condition)),
palette = "Dark"
),
unique(samplesheet$Condition)
),
# purity = col_fbp(x=samplesheet,bp=seq(min(purity),max(purity),length.out=5),color="green3"),
NULL
)
)
heat_list<-ComplexHeatmap::Heatmap(
matrix = bvals,
#Color:
col=col_fn(bvals),#col_fun, # Color defined in col_fun above
na_col="grey",
#Label:
heatmap_legend_param = list(
at = c(0, 0.5, 1),
# labels = c("hypo", 0, "hyper"),
title = paste0(expression(beta),"-vals"),
legend_height = unit(4, "cm"),
title_position = "leftcenter-rot"
),
#Rows:
show_row_names = F,
#row_title = "Amino acids",
row_names_side = "left",
#left_annotation = ha_boxplot,
# clustering_distance_rows = "manhattan",
#Columns:
show_column_names = TRUE,
column_names_side = "top",
column_title_side = "bottom",
column_names_max_height = unit(4, "cm"),
column_names_gp = gpar(fontsize = 9),
column_names_rot = 90,
cluster_columns=F,
column_split = samplesheet$Type, #factor(samplesheet$Type,levels = c("Case","Control")),
column_title = paste0(expression(beta)," values"),
#Annotation bar:
top_annotation = ha_column,
#Aspect ratios:
#column_dend_height=unit(4, "cm")
heatmap_width = unit(2, "npc"),
heatmap_height = unit(16, "cm"),
)
# Add methylation difference heatmap:
for (contrast in unique(annotation$Contrast))
{
DMPann <- annotation[ Contrast==contrast,]
# make sure to have one and only one bval for each cg probe, if more than one do mean:
setDT(DMPann)
setkeyv(DMPann,idcol)
# Remove all annotation but methylation difference in contrast and probeid:
mat<-DMPann[rownames(bvals) ,.SD,on=idcol,.SDcols=c("diff_meanMeth",idcol)]
# If more than one id (idcol) do the mean
mat<-mat[,.(bval=mean(diff_meanMeth)),by=idcol]
mat<-mat$bval
heat_list = heat_list + Heatmap(
matrix = mat,
name = contrast,
#Color:
col=col_fun,#col_fun, # Color defined in col_fun above
na_col="white",
#Legend:
heatmap_legend_param = list(
at = c(-1, -0.1, 0.1, 1),
# labels = c("hypo", "", "", "hyper"),
title = paste0(expression(beta),"diff"),
legend_height = unit(4, "cm"),
title_position = "leftcenter-rot"
),
show_heatmap_legend = ifelse(contrast == unique(annotation$Contrast)[1],T,F ),
#Columns:
show_column_names = TRUE,
column_names_side = "top",
column_names_max_height = unit(2, "cm"),
column_names_gp = gpar(fontsize = 9),
column_names_rot = 90,
heatmap_width = unit(2, "npc"),
)
}
# Add annotation
ann<-annotation[,.(V1=unique(Relation_to_Island)),by=idcol]
mat<-ann[rownames(bvals),V1,on=idcol]
heat_list<-heat_list +
Heatmap(
matrix = mat,
name = "Relation to island",
#Color:
col=setNames(palette.colors(length(unique(annotation$Relation_to_Island)),palette = "Set1"),sort(unique(annotation$Relation_to_Island))),#col_fun, # Color defined in col_fun above
#Legend:
show_heatmap_legend = TRUE,
show_column_names = TRUE,
column_names_side = "top",
column_names_max_height = unit(2, "cm"),
column_names_gp = gpar(fontsize = 9),
column_names_rot = 90,
heatmap_width = unit(1, "npc"),
)
} ss_PDC11 <- data.table::as.data.table(tar_read(ss_clean_PDC11_batch2))
betas_PDC11 <- tar_read(betas_PDC11_batch2)
colnames(betas_PDC11) <- ss_PDC11[colnames(betas_PDC11),Sample_Name,on="barcode"]
# Sort by type:
ss_PDC11 <- setorder(ss_PDC11,Type)
betas_PDC11<-betas_PDC11[,ss_PDC11$Sample_Name]A dataframe with differentially methylated probes and annotation.
library(data.table)
DMPann_PDC11<-tar_read(dmps_mod1_PDC11_batch2)
DMPann_PDC11<-data.table::setDT(DMPann_PDC11)
idcol<-"EPICv1_Loci"
setkeyv(DMPann_PDC11,idcol)
PROBEIDS <- intersect (rownames(betas_PDC11), DMPann_PDC11[[idcol]])
betas_PDC11 <- betas_PDC11[PROBEIDS,]
DMPann_PDC11 <- DMPann_PDC11[PROBEIDS,]
#Transform to data.table format to visualize and perform data manipulations
DMPann_PDC11<-data.table::setDT(DMPann_PDC11)heat_list_PDC11<-meth_heatmap(samplesheet = ss_PDC11, bvals = betas_PDC11, annotation = DMPann_PDC11, idcol = idcol)
ComplexHeatmap::draw(heat_list_PDC11, row_title = paste0("top ",NROW(betas_PDC11)," across-sample most variable sites"), row_title_gp = gpar(col = "darkblue"),
column_title = "PDC11 cell line Differentially Methylated Probes Heatmap", column_title_gp = gpar(fontsize = 16), merge_legend = TRUE)ss_H358 <- data.table::as.data.table(tar_read(ss_clean_H358))
betas_H358 <- tar_read(top_H358)
colnames(betas_H358) <- ss_H358[colnames(betas_H358),Sample_Name,on="barcode"]
# Sort by type:
setorder(ss_H358,Type)
betas_H358<-betas_H358[,ss_H358$Sample_Name]A dataframe with differentially methylated probes and annotation.
library(data.table)
DMPann_H358<-tar_read(dmps_mod1_H358)
DMPann_H358<-data.table::setDT(DMPann_H358)
idcol<-"Name"
setkeyv(DMPann_H358,idcol)
PROBEIDS <- intersect (rownames(betas_H358), DMPann_H358[[idcol]])
betas_H358 <- betas_H358[PROBEIDS,]
DMPann_H358 <- DMPann_H358[PROBEIDS,]
#Transform to data.table format to visualize and perform data manipulations
DMPann_H358<-data.table::setDT(DMPann_H358)heat_list_H358<-meth_heatmap(samplesheet = ss_H358, bvals = betas_H358, annotation = DMPann_H358, idcol = idcol)
ComplexHeatmap::draw(heat_list_H358, row_title = paste0("top ",NROW(betas_H358)," across-sample most variable sites"), row_title_gp = gpar(col = "darkblue"),
column_title = "H358 cell line Differentially Methylated Probes Heatmap", column_title_gp = gpar(fontsize = 16), merge_legend = TRUE)ss_H2009 <- data.table::as.data.table(tar_read(ss_clean_H2009))
betas_H2009 <- tar_read(top_H2009)
colnames(betas_H2009) <- ss_H2009[colnames(betas_H2009),Sample_Name,on="barcode"]
# Sort by type:
setorder(ss_H2009,Type)
betas_H2009<-betas_H2009[,ss_H2009$Sample_Name]A dataframe with differentially methylated probes and annotation.
library(data.table)
DMPann_H2009<-tar_read(dmps_mod1_H2009)
DMPann_H2009<-data.table::setDT(DMPann_H2009)
idcol<-"Name"
setkeyv(DMPann_H2009,idcol)
PROBEIDS <- intersect (rownames(betas_H2009), DMPann_H2009[[idcol]])
betas_H2009 <- betas_H2009[PROBEIDS,]
DMPann_H2009 <- DMPann_H2009[PROBEIDS,]
#Transform to data.table format to visualize and perform data manipulations
DMPann_H2009<-data.table::setDT(DMPann_H2009)heat_list_H2009<-meth_heatmap(samplesheet = ss_H2009, bvals = betas_H2009, annotation = DMPann_H2009, idcol = idcol)
ComplexHeatmap::draw(heat_list_H2009, row_title = paste0("top ",NROW(betas_H2009)," across-sample most variable sites"), row_title_gp = gpar(col = "darkblue"),
column_title = "H2009 cell line Differentially Methylated Probes Heatmap", column_title_gp = gpar(fontsize = 16), merge_legend = TRUE)There are many things you can do in order to change the plot size and aesthetics. I won’t go in detail but I think that getting the right plot size can be a bit tricky. So here is some advice on how to get it right. Here is a function to get the width and hight of your plot.
calc_ht_size = function(ht, unit = "inch") {
pdf(NULL)
ht = ComplexHeatmap::draw(ht)
w = ComplexHeatmap:::width(ht)
w = convertX(w, unit, valueOnly = TRUE)
h = ComplexHeatmap:::height(ht)
h = convertY(h, unit, valueOnly = TRUE)
dev.off()
c(w, h)
}size <- calc_ht_size(heat_list_PDC11)
pdf(paste0("PDC11_heatmap.pdf"), width = size[1], height = size[2])
heat_list_PDC11
dev.off()png
2
size <- calc_ht_size(heat_list_H2009)
pdf(paste0("H2009_heatmap.pdf"), width = size[1], height = size[2])
heat_list_H2009
dev.off()png
2
size <- calc_ht_size(heat_list_H358)
pdf(paste0("H358_heatmap.pdf"), width = size[1], height = size[2])
heat_list_H358
dev.off()png
2
When the number of rows is high the image gets compressed and we can loose some information. Also, sometimes we have different heatmaps with a different number of rows for each of them, which results on different aspect ratios. If we want to control the height of each row and keep it stable between plots we must consider: - By convention resolution in R is 96, which means 96 pixels fit on an inch. So if you don’t want to loose any line your cell height should be 1 pixel tall at least. 96 lines in 1 inch means each line is 0.26mm which is already very small, so don’t try to make it smaller.
library(ComplexHeatmap)
plotsizes=c(100,200)
y = NULL
for(nr in plotsizes) {
betas<-top_beta(betas_PDC11,nr)
ht = ComplexHeatmap::draw(meth_heatmap(ss_PDC11,betas,annotation = DMPann_PDC11,idcol = "EPICv1_Loci"),height=unit(1/96, "inch")*nr)
ht_height = sum(component_height(ht)) + unit(4, "mm")
ht_height = convertHeight(ht_height, "inch", valueOnly = TRUE)
y = c(y, ht_height)
}sizemod <- lm(y ~ plotsizes)
sizemod
Call:
lm(formula = y ~ plotsizes)
Coefficients:
(Intercept) plotsizes
2.43104 0.01042
Now we use the formula to control for the plot size, so we can make the plots from the different cell lines proportional in row height:
png(paste0("PDC11_heatmap_resized.png"),
width = 14,
height = sizemod$coefficients[2]*NROW(betas_PDC11) + sizemod$coefficients[1],
units = "in",
res = 96
)
ComplexHeatmap::draw(heat_list_PDC11, row_title = paste0("top ",NROW(betas_PDC11) ," across-sample EPICv1 most variable sites"), row_title_gp = gpar(col = "darkblue") , height = unit(1*1/96, "inch")*NROW(betas_PDC11), column_title = "PDC11 cell line Differentially Methylated Probes Heatmap", merge_legend = TRUE)
dev.off()
png(paste0("H358_heatmap_resized.png"),
width = 14,
height = sizemod$coefficients[2]*NROW(betas_H358) + sizemod$coefficients[1]+2,
units = "in",
res = 96
)
ComplexHeatmap::draw(heat_list_H358, row_title = paste0("top ",NROW(betas_H358) ," across-sample EPICv1 most variable sites"), row_title_gp = gpar(col = "darkblue") , height = unit(1*1/96, "inch")*NROW(betas_H358), column_title = "H358 cell line Differentially Methylated Probes Heatmap", merge_legend = TRUE)
dev.off()
png(paste0("H2009_heatmap_resized.png"),
width = 14,
height = sizemod$coefficients[2]*NROW(betas_H2009) + sizemod$coefficients[1],
units = "in",
res = 96
)
ComplexHeatmap::draw(heat_list_H2009, row_title = paste0("top ",NROW(betas_H2009) ," across-sample EPICv1 most variable sites"), row_title_gp = gpar(col = "darkblue") , height = unit(1*1/96, "inch")*NROW(betas_H2009), column_title = "H2009 cell line Differentially Methylated Probes Heatmap", merge_legend = TRUE)
dev.off()The objective now is to generate 4 plots.
Individual plots:
Combined plot:
top_beta <- function(beta_values, n=1000){
sdv <- apply(beta_values, 1, sd)
topn <- names(head(sort(sdv,decreasing=T), n))
beta_topn <- beta_values[topn,]
return(beta_topn)
}Load the data:
ss_H2009 <- readRDS("data/ss_H2009.rds")[-1,]
betas_H2009 <- readRDS("data/betas_H2009.rds")
DMPann_H2009 <- readRDS("data/DMPann_H2009.rds")
idcol<-"Name"Apply the function to select the top 1000 most variable sites:
n = 1000 # change this value to select a different number of probes
betas_H2009_heat <- top_beta(betas_H2009,n=n)Generate the plot using the functions defined in the Section 0.3 section:
heat_list_H2009 <- meth_heatmap(samplesheet = ss_H2009, bvals = betas_H2009_heat, annotation = DMPann_H2009, idcol = idcol)Save with the desired resolution and titles:
library(ComplexHeatmap)
library(circlize)
ComplexHeatmap::draw(heat_list_H2009, row_title = paste0("top ",n ," across-sample most variable sites"), row_title_gp = gpar(col = "darkblue") , height = unit(1*1/96, "inch")*n, column_title = paste0( "H2009 cell line top ", n, " most variable Probes Heatmap"), merge_legend = TRUE)First we need to load the data:
ss_H358 <- readRDS("data/ss_H358.rds")[-1,]
betas_H358 <- readRDS("data/betas_H358.rds")
DMPann_H358 <- readRDS("data/DMPann_H358.rds")
idcol<-"Name"Then apply the function to select the top 1000 most variable sites:
n = 1000 # change this value to select a different number of probes
betas_H358_heat <- top_beta(betas_H358,n=n)Now we can generate the plot this using the functions defined Section 0.3:
heat_list_H358 <- meth_heatmap(samplesheet = ss_H358, bvals = betas_H358_heat, annotation = DMPann_H358, idcol = idcol)And finally save with the desired resolution and titles:
library(ComplexHeatmap)
library(circlize)
ComplexHeatmap::draw(heat_list_H358, row_title = paste0("top ",n ," across-sample most variable sites"), row_title_gp = gpar(col = "darkblue") , height = unit(1*1/96, "inch")*n, column_title = paste0( "H358 cell line top ", n, " most variable Probes Heatmap"), merge_legend = TRUE)First we need to load the data:
ss_PDC11_batch2 <- readRDS("data/ss_PDC11_batch2.rds")[-1,]
betas_PDC11_batch2 <- readRDS("data/betas_PDC11_batch2.rds")
DMPann_PDC11_batch2 <- readRDS("data/DMPann_PDC11_batch2.rds")In the idcol variable, we store the name of the probe that contains the methylation measure for a specific cg site. These probe names vary for EPIC v2 arrays but remain consistent for 450k and EPIC arrays. However, in the case of EPIC v2 arrays, we can utilize the information in the EPICv1_Loci column, which contains the equivalent cg site ID if available from the EPIC v1 array. This allows us to compare cg sites across different array types.
idcol<-"EPICv1_Loci"Then apply the function to select the top 1000 most variable sites:
n = 1000 # change this value to select a different number of probes
betas_PDC11_batch2_heat <- top_beta(betas_PDC11_batch2,n=n)Now we can generate the plot this using the functions defined Section 0.3:
heat_list_PDC11_batch2 <- meth_heatmap(samplesheet = ss_PDC11_batch2, bvals = betas_PDC11_batch2_heat, annotation = DMPann_PDC11_batch2, idcol = idcol)And finally save with the desired resolution and titles:
library(ComplexHeatmap)
library(circlize)
ComplexHeatmap::draw(heat_list_PDC11_batch2, row_title = paste0("top ",n ," across-sample most variable sites"), row_title_gp = gpar(col = "darkblue") , height = unit(1*1/96, "inch")*n, column_title = paste0( "PDC11_batch2 cell line top ", n, " most variable Probes Heatmap"), merge_legend = TRUE)In this case we are going to combine the 3 cell lines into the same plot so we can compare between cell lines. In order to see differences between the cell lines we could choose one of these options:
Option A: Use the top variable sites across all samples (same approach as before but using all samples now)
- Identify Top Variable Sites:
Calculate variability for each site across all samples.
Select the top variable sites based on this calculation.
- Generate Combined Plot:
Plot the selected top variable sites.
Use different colors or symbols for each cell line.
Option B: Merge the top 1000 sites from each cell line
- Use the Top 1000 Sites for each Cell Line we have calculated above.
- Merge into a Combined Pool.
- Generate Combined Plot.
In this case we are going to choose option B, but I am sure you will be able to reproduce the steps to make option A 😉.
Combine the top1000 sites for each cell line into a single set of probes:
l_betas_ids <- list(rownames(betas_PDC11_batch2_heat),rownames(betas_H358_heat),rownames(betas_H2009_heat))
common_cgsites <- Reduce(union,l_betas_ids)
length(common_cgsites)[1] 2888
Trim the beta values to contain only those sites (if present) for all the cell lines:
library(data.table)
l_betas <- list(betas_PDC11_batch2, betas_H358, betas_H2009)
betas_trimmed <- lapply(l_betas,function(x){
dt <- as.data.table(x,keep.rownames = "id")
dt <- dt[common_cgsites,,on="id"]
return(dt)
})Combine the beta values for all samples in a single object:
common_betas <- Reduce(function(x,y)merge(x,y,by = 'id',all=T), betas_trimmed)
b<- as.matrix(common_betas[,-1])
rownames(b)<-common_betas$id
common_betas <- b[complete.cases(b),]A total of 597 probes, out of the 2291 available, are absent for certain cell lines. This makes it impossible for the distance calculation algorithm to work since all values within the grouping factor are missing. To adress this issue this probes have been removed.
Combine annotation and samplesheet:
# Add arraytype in all sample sheets:
ss_H2009[,arraytype:="EPIC"]
ss_H358$arraytype <- "EPIC"
common_ss <- rbindlist(list(ss_PDC11_batch2,ss_H2009,ss_H358),use.names=TRUE)
# Make idcol consistent for EPICv2:
DMPann_PDC11_batch2$Name <- DMPann_PDC11_batch2$EPICv1_Loci
common_annotation <- rbindlist(list(DMPann_PDC11_batch2,DMPann_H2009,DMPann_H358),use.names=TRUE, fill=TRUE)
colnames(common_betas) <- common_ss[colnames(b),Sample_Name,on="barcode"]Modify the heatmap functions (#**):
library(ComplexHeatmap)
meth_heatmap2 <- function(samplesheet, bvals, annotation, idcol="rn" ){
require(ComplexHeatmap)
require(data.table)
data.table::setDT(annotation)
setkeyv(annotation,idcol)
# Annotation object for top annotation:
data.table::setDT(samplesheet)
# purity = samplesheet[,.SD,.SDcols=startsWith(names(samplesheet),"purity")]
ha_column = HeatmapAnnotation(
annotation_name_side = "left",
CL = anno_block( gp=gpar( fill=RColorBrewer::brewer.pal(n=length(unique(samplesheet$CL)) , name = "Accent")),
labels = as.character(unique(samplesheet$CL)), #unique(samplesheet$Type), #**
labels_gp = gpar(col = "black", fontsize = 11)), #**
Type = samplesheet$Type,
Condition = samplesheet$Condition,
# purity = unlist(purity),
col = #A list of named vectors were names = vector values and value = color.
list(
Condition = setNames(
palette.colors(length(unique(samplesheet$Condition)),
palette = "Dark"
),
unique(samplesheet$Condition)
),
Type = setNames(
rainbow(n=length(unique(samplesheet$Type))),
unique(samplesheet$Type)
),
# purity = col_fbp(x=samplesheet,bp=seq(min(purity),max(purity),length.out=5),color="green3"),
NULL
)
)
heat_list<-ComplexHeatmap::Heatmap(
matrix = bvals,
#Color:
col=col_fn(bvals),#col_fun, # Color defined in col_fun above
na_col="grey",
#Label:
heatmap_legend_param = list(
at = c(0, 0.5, 1),
# labels = c("hypo", 0, "hyper"),
title = paste0(expression(beta),"-vals"),
legend_height = unit(4, "cm"),
title_position = "leftcenter-rot"
),
#Rows:
show_row_names = F,
#row_title = "Amino acids",
row_names_side = "left",
#left_annotation = ha_boxplot,
# clustering_distance_rows = "manhattan",
#Columns:
show_column_names = TRUE,
column_names_side = "top",
column_title_side = "bottom",
column_names_max_height = unit(4, "cm"),
column_names_gp = gpar(fontsize = 9),
column_names_rot = 90,
cluster_columns=F,
column_split = samplesheet$CL, #factor(samplesheet$Type,levels = c("Case","Control")), #**
column_title = paste0(expression(beta)," values"),
#Annotation bar:
top_annotation = ha_column,
#Aspect ratios:
#column_dend_height=unit(4, "cm")
heatmap_width = unit(2, "npc"),
heatmap_height = unit(16, "cm")
)
ann<-annotation[,.(V1=unique(Relation_to_Island)),by=idcol]
mat<-ann[rownames(bvals),,on=idcol][!duplicated(Name),V1] #**
heat_list<-heat_list +
Heatmap(
matrix = mat,
name = "Relation to island",
#Color:
col=setNames(palette.colors(length(unique(annotation$Relation_to_Island)),palette = "Set1"),sort(unique(annotation$Relation_to_Island))),#col_fun, # Color defined in col_fun above
#Legend:
show_heatmap_legend = TRUE,
show_column_names = TRUE,
column_names_side = "top",
column_names_max_height = unit(2, "cm"),
column_names_gp = gpar(fontsize = 9),
column_names_rot = 90,
heatmap_width = unit(1, "npc"),
)
} Generate the heatmap:
heat_list_common <- meth_heatmap2(samplesheet = common_ss, bvals = common_betas, annotation = common_annotation, idcol = "Name")`use_raster` is automatically set to TRUE for a matrix with more than
2000 rows. You can control `use_raster` argument by explicitly setting
TRUE/FALSE to it.
Set `ht_opt$message = FALSE` to turn off this message.
'magick' package is suggested to install to give better rasterization.
Set `ht_opt$message = FALSE` to turn off this message.
Adjust dimensions:
library(ComplexHeatmap)
plotsizes=c(100,200)
y = NULL
for(nr in plotsizes) {
betas<-top_beta(betas_PDC11,nr)
ht = ComplexHeatmap::draw(meth_heatmap2(samplesheet = common_ss, bvals = common_betas, annotation = common_annotation, idcol = "Name"),height=unit(1/96, "inch")*nr)
ht_height = sum(component_height(ht)) + unit(4, "mm")
ht_height = convertHeight(ht_height, "inch", valueOnly = TRUE)
y = c(y, ht_height)
}
sizemod <- lm(y ~ plotsizes)
sizemodDraw and save:
png(paste0("Combined_heatmap_resized.png"),
width = 14,
height = sizemod$coefficients[2]*NROW(common_betas) + sizemod$coefficients[1],
units = "in",
res = 96
)
ComplexHeatmap::draw(heat_list_common,
row_title = paste0("cg Sites"),
row_title_gp = gpar(col = "darkblue"),
height = unit(1*1/96, "inch")*NROW(common_betas),
column_title = paste0( "Combined Cell Lines: Top 1000 Most Variable Sites from each Cell Line"),
merge_legend = TRUE
)
dev.off()::: {#fig-comb layout-nrow=“1”}